# ===============================================================#
#                     Replication files for:                     #
#.  "Attitudinal and Behavioral Legacies of Wartime Violence:    #
#                      A Meta-Analysis"                          #
#                        Joan Barceló                            #
#               American Political Science Review                #
#               Last update: September 3, 2025                   #
# ===============================================================#

#################################
# Figure 1: Descriptive statistics
################################

## ------------------------ Load Packages ------------------------

library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)

# ---------- Paths and file maps ----------

base_path_meta    <- "~/Datasets/"

## ------------------------ Load data ------------------------

meta.groups            <- read.csv(paste0(base_path_meta, "meta.groups.csv"))
meta.participation     <- read.csv(paste0(base_path_meta, "meta.political_participation.csv"))
meta.leadership        <- read.csv(paste0(base_path_meta, "meta.leadership.csv"))
meta.interest          <- read.csv(paste0(base_path_meta, "meta.political_interest.csv"))
meta.turnout           <- read.csv(paste0(base_path_meta, "meta.voting.csv"))
meta.trust             <- read.csv(paste0(base_path_meta, "meta.generalized_trust.csv"))
meta.altruism          <- read.csv(paste0(base_path_meta, "meta.altruism.csv"))
meta.normativegames    <- read.csv(paste0(base_path_meta, "meta.normative.csv"))

meta.warenemies        <- read.csv(paste0(base_path_meta, "meta.warenemies.csv"))
meta.threat            <- read.csv(paste0(base_path_meta, "meta.threat.csv"))
meta.intergroup        <- read.csv(paste0(base_path_meta, "meta.intergroup.csv"))

meta.ingroup_trust     <- read.csv(paste0(base_path_meta, "meta.ingroup_trust.csv"))
meta.groupid           <- read.csv(paste0(base_path_meta, "meta.groupid.csv"))
meta.groupvoting       <- read.csv(paste0(base_path_meta, "meta.groupvoting.csv"))

meta.polintol          <- read.csv(paste0(base_path_meta, "meta.political_intolerance.csv"))
meta.socintol          <- read.csv(paste0(base_path_meta, "meta.social_intolerance.csv"))
meta.autho             <- read.csv(paste0(base_path_meta, "meta.authoritarian.csv"))
meta.institutional     <- read.csv(paste0(base_path_meta, "meta.instmistrust.csv"))
meta.security          <- read.csv(paste0(base_path_meta, "meta.hawkish.csv"))
meta.punitive          <- read.csv(paste0(base_path_meta, "meta.punitive.csv"))
meta.antipeace         <- read.csv(paste0(base_path_meta, "meta.antipeace.csv"))
meta.xtr_ideology      <- read.csv(paste0(base_path_meta, "meta.xtrideology.csv"))

keep_cols <- c("authoryear", "coef", "se", "n", "country", "year",
               "did_design", "uesd_design", "iv_design", "rd_design", "random_design",
               "exptype", "exptype2", "ExposureLagMean", "UnitAnalysis")

meta.all.items <- dplyr::bind_rows(
  meta.groups[, keep_cols],
  meta.participation[, keep_cols],
  meta.leadership[, keep_cols],
  meta.interest[, keep_cols],
  meta.turnout[, keep_cols],
  meta.trust[, keep_cols],
  meta.altruism[, keep_cols],
  meta.normativegames[, keep_cols],
  meta.warenemies[, keep_cols],
  meta.threat[, keep_cols],
  meta.intergroup[, keep_cols],
  meta.ingroup_trust[, keep_cols],
  meta.groupid[, keep_cols],
  meta.groupvoting[, keep_cols],
  meta.polintol[, keep_cols],
  meta.socintol[, keep_cols],
  meta.autho[, keep_cols],
  meta.institutional[, keep_cols],
  meta.security[, keep_cols],
  meta.punitive[, keep_cols],
  meta.antipeace[, keep_cols],
  meta.xtr_ideology[, keep_cols]
)

meta.all.items <- meta.all.items %>%
  select(authoryear, country, year, exptype, exptype2, did_design, uesd_design,
         iv_design, rd_design, random_design, ExposureLagMean, UnitAnalysis)

meta.all.items_nodup <- meta.all.items %>%
  distinct(authoryear, country, .keep_all = TRUE)

# Generate descriptive summary
descriptive_nodup <- meta.all.items_nodup %>%
  mutate(
    pub_year = str_extract(authoryear, "\\d{4}"),
    data_year = as.numeric(year),
    country = as.factor(country)
  )

# Exposure-treat specific sets
meta.all.items_nodup_exptreat <- meta.all.items %>%
  distinct(authoryear, country, exptype, exptype2, .keep_all = TRUE)

meta.all.items_nodup_design <- meta.all.items_nodup %>%
  distinct(authoryear, country, exptype, exptype2, .keep_all = TRUE)

# Descriptive 1, Publication Year
hist_year <- ggplot(descriptive_nodup, aes(x = as.numeric(pub_year))) +
  geom_histogram(binwidth = 1, fill = "black", color = "black") +
  theme_minimal() +
  labs(x = "Publication Year", y = "Number of studies") +
  theme(
    axis.line = element_line(colour = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle = 0, hjust = 1, size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 16, margin = margin(t = 4)),
    axis.title.y = element_text(size = 16)
  )

print(hist_year)

# Descriptive 2, Country of study
country_counts <- descriptive_nodup %>%
  dplyr::count(country) %>%
  arrange(desc(n))

country_counts <- country_counts %>%
  mutate(country = ifelse(n < 5, "Others", as.character(country))) %>%
  dplyr::group_by(country) %>%
  dplyr::summarise(n = sum(n), .groups = "drop") %>%
  arrange(desc(n)) %>%
  mutate(country = str_to_title(country))

country_counts[country_counts$country == "Sierraleone", "country"] <- "Sierra Leone"
country_counts[country_counts$country == "Northernireland", "country"] <- "Northern Ireland"

hist_country <- ggplot(country_counts[country_counts$country != "Others", ], aes(x = reorder(country, -n), y = n)) +
  geom_bar(stat = "identity", fill = "black", color = "black") +
  scale_y_continuous(breaks = function(limits) seq(ceiling(limits[1]), floor(limits[2]), by = 1)) +
  theme_minimal() +
  labs(x = "Country", y = "Number of studies") +
  theme(
    axis.line = element_line(colour = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 16, margin = margin(t = 15)),
    axis.title.y = element_text(size = 16)
  )

print(hist_country)

# Descriptive 3, Exposure Lag
meta.all.items_nodup$ExposureLagMean <- as.numeric(meta.all.items_nodup$ExposureLagMean)
meta.all.items_nodup$ExposureLagMean <- ifelse(meta.all.items_nodup$ExposureLagMean > 80, 80, meta.all.items_nodup$ExposureLagMean)

summary(meta.all.items_nodup$ExposureLagMean)
length(which(meta.all.items_nodup$ExposureLagMean > 20)) / length(meta.all.items_nodup$ExposureLagMean)

breaks <- c(seq(0, 80, by = 1), Inf)
label_breaks <- c(seq(0, 70, by = 10), 80)
label_names  <- c(as.character(seq(0, 70, by = 10)), "80+")

hist_timelag <- ggplot(meta.all.items_nodup, aes(x = ExposureLagMean)) +
  geom_histogram(breaks = breaks, fill = "black", color = "black") +
  scale_x_continuous(breaks = label_breaks, labels = label_names, limits = c(0, NA)) +
  theme_minimal() +
  labs(x = "Time lag between conflict exposure\nand outcome assessment (in years)", y = "Frequency") +
  theme(
    axis.line = element_line(colour = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 16, margin = margin(t = 15)),
    axis.title.y = element_text(size = 16)
  )

print(hist_timelag)

# Descriptive 4, Type of Violence Measures
exposure_treat_count <- meta.all.items_nodup_exptreat %>%
  dplyr::count(exptype, exptype2) %>%
  arrange(desc(n))

exposure_treat_count[exposure_treat_count$exptype2 == "o", "exptype2"] <- "Objective\nexposure"
exposure_treat_count[exposure_treat_count$exptype == "N" & exposure_treat_count$exptype2 == "s", "exptype2"] <- "Self-reported\nexposure"
exposure_treat_count[exposure_treat_count$exptype == "Y" & exposure_treat_count$exptype2 == "s", "exptype2"] <- "Experimental\nprimes"

hist_exposure_treat <- ggplot(exposure_treat_count, aes(x = reorder(exptype2, -n), y = n)) +
  geom_bar(stat = "identity", fill = "black", color = "gray50") +
  theme_minimal() +
  labs(x = "Type of exposure treatment", y = "Frequency") +
  theme(
    axis.line = element_line(colour = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle = 0, hjust = 0.5, size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 16, margin = margin(t = 10, b = 10)),
    axis.title.y = element_text(size = 16)
  )

print(hist_exposure_treat)

# Descriptive 5, Unit of analysis
unit_data <- as.data.frame(table(meta.all.items_nodup$UnitAnalysis))
colnames(unit_data) <- c("UnitAnalysis", "Count")

unit_data <- unit_data %>%
  mutate(
    individual = case_when(
      str_detect(UnitAnalysis, regex("Individual|Inidividual|Inividual|individual|Household", ignore_case = TRUE)) ~ 1,
      TRUE ~ 0
    ),
    aggregate = case_when(
      str_detect(UnitAnalysis, regex("cluster", ignore_case = TRUE)) ~ 0,
      str_detect(UnitAnalysis, regex("Chiefdom|Community|Locality|District|Municipality|Municipalities|Municipailty|Neighbourhood|Village|Hamlet|County", ignore_case = TRUE)) ~ 1,
      TRUE ~ 0
    )
  )

unit_data$individual <- unit_data$individual * unit_data$Count
unit_data$aggregate <- unit_data$aggregate * unit_data$Count

sum_unit <- data.frame(
  Category = c("Individual", "Aggregate"),
  Count = c(sum(unit_data$individual), sum(unit_data$aggregate))
)

plot_sum_unit <- ggplot(sum_unit, aes(x = Category, y = Count, fill = Category)) +
  geom_bar(stat = "identity") +
  labs(x = "Unit of analysis", y = "Number of studies") +
  scale_fill_manual(values = c("Individual" = "black", "Aggregate" = "gray30")) +
  theme_minimal() +
  theme(
    legend.position = "none",
    axis.title.x = element_text(size = 14, margin = margin(t = 10)),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12)
  )

print(plot_sum_unit)

# Descriptive 6, Research designs
no_design_studies <- meta.all.items %>%
  filter(did_design == "N", uesd_design == "N", iv_design == "N",
         rd_design == "N", random_design == "N") %>%
  distinct(authoryear) %>%
  mutate(Design = "None", n = 1)

design_counts <- meta.all.items %>%
  select(authoryear, did_design, uesd_design, iv_design, rd_design, random_design) %>%
  tidyr::pivot_longer(cols = -authoryear, names_to = "Design", values_to = "Used") %>%
  filter(Used == "Y") %>%
  distinct(authoryear, Design) %>%
  count(Design)

none_count <- no_design_studies %>% count(Design)
design_counts <- bind_rows(design_counts, none_count)

design_labels <- c(
  did_design = "DID",
  uesd_design = "ITS",
  iv_design = "IV",
  rd_design = "RDD",
  random_design = "Other\nrandom\ndesign",
  None = "No causal\ndesign"
)

design_counts <- design_counts %>%
  mutate(Label = design_labels[Design])

(174 - 126) / sum(design_counts$n)

plot_designs <- ggplot(design_counts, aes(x = reorder(Label, -n), y = n, fill = Label)) +
  geom_bar(stat = "identity") +
  labs(x = "Research designs", y = "Number of studies") +
  scale_fill_manual(values = rep("black", nrow(design_counts))) +
  theme_minimal() +
  theme(
    legend.position = "none",
    axis.text.x = element_text(angle = 0, size = 12),
    axis.title.x = element_text(size = 14, margin = margin(t = 10)),
    axis.title.y = element_text(size = 14),
    axis.text.y = element_text(size = 12)
  )

print(plot_designs)

# Assemble a single PDF of the summary plots
library(patchwork)

top_row <- hist_year + hist_country + plot_layout(ncol = 2) &
  theme(plot.title = element_text(size = 12, face = "bold"))

middle_row <- hist_timelag + hist_exposure_treat + plot_layout(ncol = 2) &
  theme(plot.title = element_text(size = 12, face = "bold"))

bottom_row <- plot_sum_unit + plot_designs + plot_layout(ncol = 2) &
  theme(plot.title = element_text(size = 12, face = "bold"))

full_figure <- top_row / middle_row / bottom_row +
  plot_layout(heights = c(1, 1, 1.1)) +
  plot_annotation(title = "",
                  theme = theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5)))

print(full_figure)